/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/*
 * Linux implementation of I/O buffers
 *
 * Contents:
 *   static struct cxiKernelIOBufferDesc_t* kibdAlloc
 *   static void kibdFree
 *   static void deallocKernelIOBufferDesc
 *   static int allocKernelIOBufferDesc
 *   KibdModuleInit
 *   KibdModuleTerm
 *   cxiKibdPin
 *   cxiKibdUnpin
 *   cxiKibdUnpinAll
 *   cxiKibdSplit
 *   cxiKibdMerge
 *
 *   cxiAttachIOBuffer
 *   cxiDetachIOBuffer
 *   cxiUXfer
 *   cxiKXfer
 *   cxiKZero
 *   cxiMapDiscontiguousRW
 *   cxiUnmapDiscontiguousRW
 *   cxiMapContiguousRO
 *   cxiUnmapContiguousRO
 *   BHioDone
 *   cxiStartIO
 *   cxiWaitIO
 *   cxiKDoIO
 *   GetDiskInfoX
 */

/*
 * $Id: cxiIOBuffer.c,v 1.22.2.4 2002/07/17 14:21:00 gjertsen Exp $
 *
 * $Log: cxiIOBuffer.c,v $
 * Revision 1.22.2.4  2002/07/17 14:21:00  gjertsen
 * Changes for recent kernel version updates with RH 7.2 and 7.3.
 *
 * Revision 1.22.2.3  2002/05/21 21:44:58  dcraft
 * Pull GPFS 1.2.1 up to kernel 2.4.18.
 * mmfsfuncs.Linux must be distributed with /usr/lpp/mmfs/src
 * on developerworks.
 *
 * Revision 1.22.2.2  2001/12/18 14:33:10  mcnabb
 * kxMapPrivate now works on 2.4.2 and later kernels.
 *
 * Revision 1.22.2.1  2001/12/12 17:00:23  mcnabb
 * Map page pool MAP_PRIVATE and VM_DONTCOPY so child copy on write
 * semantics don't occur and we can get rid of flush_inode_pages().
 *
 * Revision 1.22  2001/10/25 23:50:31  wyllie
 * Decrease wait time in cxiWaitIO from 50ms to 10ms.  Add counter for the
 * total number of waits in this loop.
 *
 * Revision 1.21  2001/10/23 22:22:23  wyllie
 * Follow-up on Raleigh defect 3666: Make sure that iodone handlers do not touch
 * buffer_heads that have been deallocated.
 *
 * Revision 1.20  2001/10/17 19:58:23  dcraft
 * defect 351856 avoid flush_inode_pages() if host is null
 *
 * Revision 1.19  2001/10/09 17:45:26  dcraft
 * Fixes for running on 2.4.9-ac kernel series. (behind ifdefs)
 *
 * Revision 1.18  2001/10/06 01:26:32  schmuck
 * Put back some ifdef'ed performance tracing code that was lost in Rev 1.12.
 *
 * Revision 1.17  2001/10/05 22:09:05  dcraft
 * Defect 350418.  Remove users of cxiKernelIOBufferDescs before
 * destroying the slab cache.  Otherwise the cache isn't destroyed
 * and an OOPS occurs when an attempt is made to recreate the
 * cache on the next module load.
 *
 * Revision 1.16  2001/10/03 19:58:18  wyllie
 * Call flush_inode_pages after pinning a page pool buffer, so that pages
 * in the page pool do not wind up in the Linux page cache.  A disclaim of
 * pages in such a state does not really free the pages, making this call
 * necessary.  The MAP_SHARED flag on the mmap call that created the
 * underlying memory region set up the behavior that the call to
 * flush_inode_pages undoes.
 *
 * Revision 1.15  2001/10/01 14:43:28  dcraft
 * Allow compiling portability layer for 2.4.7-10
 *
 * Revision 1.14  2001/09/25 19:03:49  dcraft
 * OPEN_OVERLOAD no longer needed.  Move cxiKiobuf_t into cxiIOBuffer.C
 *
 * Revision 1.13  2001/09/25 18:06:06  gjertsen
 * Fix previous change for IA64.
 *
 * Revision 1.12  2001/09/22 20:07:58  dcraft
 * Remove kiobufs from cxiKernelIODescriptor_t.  Use temporary
 * kiobufs for map/unmap.   Remove dead code and dead comments
 * in portability layer and update readmes and license.
 * Fix traceback to appear in mmfs.log file.
 *
 * Revision 1.10  2001/09/10 16:27:37  wyllie
 * Do not use brw_kiovec to do I/O.  Instead, build one buffer_head per
 * page spanned by the I/O, and submit them using generic_make_request.
 * This avoids breaking up large I/Os into 64K chunks done synchronously,
 * and also avoids apparent bugs in the kiobuf I/O code.  CPU for
 * coalescing is also reduced, since the old method used 8 times as many
 * buffer_heads (one per sector instead of one per page).
 *
 * Revision 1.9  2001/09/06 12:48:18  gjertsen
 * Remove IA64 ifdef that stubbed GetDiskInfoX.
 *
 * Revision 1.7  2001/08/09 21:11:16  dcraft
 * Modifications to allow running on latest Redhat 7.1 update
 * Kernel version 2.4.3-12.
 * Requires checkout of new site.mcr.proto
 *
 * Revision 1.6  2001/07/10 16:18:04  wyllie
 * Additional debug code to catch corruption of the local kiobuf used for
 * small I/Os.
 *
 * Revision 1.5  2001/06/25 21:23:12  tee
 * Make direct I/O work on Linux.
 *
 * Revision 1.4  2001/05/23 17:06:17  wyllie
 * Remove debugging code under ifdef JCW_DEBUG now that bug is fixed.  Raise
 * trace levels for detailed map/unmap discontig traces.
 *
 * Revision 1.3  2001/05/23 16:48:52  wyllie
 * Add asserts to catch a bug (illegal address for kiobuf.end_io callback)
 *
 * Revision 1.2  2001/05/02 20:12:35  wyllie
 * Fix for defect 338323 (DBGASSERT(pageP != NULL)).  In cxiUnmapDiscontiguousRW,
 * need to deal with case that the buffer was last mapped by MapContiguousBuffer.
 *
 * Revision 1.1  2001/04/10 21:10:44  wyllie
 * Convert cxiIOBuffer.C from C++ to C.
 *
 * Revision 1.33  2001/04/04 15:52:45  wyllie
 * Add a comment
 *
 * Revision 1.32  2001/03/12 19:53:10  schmuck
 * Add ifdef to allow using IDE disks on a kernel with KIO enabled.
 *
 * Revision 1.31  2001/03/09 18:10:50  schmuck
 * More SMB oplock code.
 *
 * Revision 1.30  2001/03/08 01:24:17  dcraft
 * Syntactic sugar for more strict g++ compiler.  No functional change.
 *
 * Revision 1.29  2001/03/06 17:10:47  wyllie
 * Add code to catch corruption of sdata structures used in unmapDiscontiguousRW
 *
 * Revision 1.28  2001/02/02 16:20:50  wyllie
 * Change trace level to catch a bug in unmapDiscontiguousRW
 *
 * Revision 1.27  2001/01/25 18:36:37  wyllie
 * Remove incomplete I/O code under ifdef BIG_BRW_KIOVEC
 *
 * Revision 1.26  2001/01/17 16:59:54  wyllie
 * Get this to compile with if LINUX_RW_KIO is defined
 *
 * Revision 1.25  2001/01/13 00:04:35  dcraft
 * Changes for Redhat 7.0 and 2.4.0 kernel
 *
 * Revision 1.24  2000/12/18 13:53:13  gjertsen
 * More cleanup of comments/documentation.
 *
 * Revision 1.23  2000/12/15 13:56:34  gjertsen
 * Clean up documentation.
 *
 * Revision 1.22  2000/12/14 21:58:11  wyllie
 * Remove KIOBUF_STUFF ifdefs
 *
 * Revision 1.21  2000/11/07 22:10:26  wyllie
 * Do not complain about absence of CONFIG_HIGHMEM, since it must not be defined
 * to compile for machines with small memory.
 *
 * Revision 1.20  2000/11/06 19:56:08  gjertsen
 * Linux code cleanup and put in build safeguards.
 *
 * Revision 1.19  2000/11/03 20:26:55  dcraft
 * Build SMP, UP, NOHIGHMEM, and 4G memory variations of mmfslinux in
 * one pass.   Names are mmfslinux_UP1G, mmfslinux_UP4G, mmfslinux_SMP1G,
 * mmfslinux_SMP4G.
 *
 * Revision 1.18  2000/11/02 19:46:17  gjertsen
 * Linux code split. Pull out NBD stuff.
 *
// Revision 1.17  2000/10/26  20:52:25  gjertsen
// Purge out ugly USE_CWRAPPERS and export module symbols explicitly
// as the default (in IA64 safe manner).
//
// Revision 1.16  2000/10/24  14:04:41  gjertsen
// Clean up linux module specific code so that gpfs can be
// compiled in the kernel (to allow IA64 kernel debugging).
//
// Revision 1.15  2000/10/14  00:17:03  wyllie
// Make sure that CONFIG_HIGHMEM is on in the kernel build if KIOBUF_STUFF is
// being used.  Otherwise, kmap() calls will fail on machines with large
// memory for pages above 1G on i386.
//
// Revision 1.14  2000/10/04  19:52:47  gjertsen
// Only include smplock.h for SMP configuration (to allow use of uniproc kernel).
//
// Revision 1.13  2000/09/26  23:22:07  wyllie
// Experiments with other methods for doing I/O.  Not ready for prime time.
//
// Revision 1.12  2000/09/15  20:37:48  wyllie
// Make detach work even if attach was not called, instead of asserting.
// Under control of the new build flag LKCD_KIOBUFS, change calling convention
// for map_user_kiobuf.  The signature of this routine changes when the
// Linux kernel crash dump patch is applied.  Add additional error checking
// on real I/O path.
//
// Revision 1.11  2000/08/29  18:31:49  dcraft
// Produce mmfs module.
//
// Revision 1.10  2000/08/28  14:13:18  gjertsen
// Need to export all kernel symbols explicitly in IA64
// Linux due to bug with insmod.
//
// Revision 1.9  2000/08/21  22:15:41  dcraft
// Create cxiDev_t type that is based on user level dev_t.  Provide
// mapping functions between kernel, user, and inode device field.
// Fix NLS yes/no query.
//
// Revision 1.8  2000/08/14  06:11:35  sharma
// #define virtual_xxx and #undef virtual for comiling with KIOBUF_STUFF
//
 * Revision 1.7  2000/08/11 20:13:33  eshel
 * Fixs for compile and load on IA64.
 *
// Revision 1.6  2000/08/10  00:00:52  wyllie
// Buffer management for Linux, phase III: physical disk I/O.  I/O is done
// synchronously by kxStartIO.  For well-aligned buffers (full blocks starting
// at offset 0), uses the kiobufs already built.  For other I/O, builds a
// temporary kiobuf to pass to brw_kiovec.
//
// Revision 1.5  2000/08/02  14:55:54  dcraft
// Typedef Ptrdiff correction from int to long for 64bit
// Correct quota flags for root user override
//
// Revision 1.4  2000/08/01  21:24:44  wyllie
// Make unmapDiscontiguousRW use the correct number of pages.
//
// Revision 1.3  2000/08/01  17:07:35  wyllie
// Buffer management for Linux, phase II: for each Buffer created by the page
// pool manager, create a shadow of the buffer in the kernel that contains
// kiobufs pointing to the Linux struct page objects for each page in the data
// area of the Buffer.  Use these mappings to implement uXfer, kXfer, etc.
// Not yet fully functional; requires a -D flag to activate.
//
// Revision 1.2  2000/07/11  16:35:10  wyllie
// Use cxiUio_t instead of struct uio.  Use cxiUiomove instead of uiomove.  Use
// CXI_READ instead of UIO_READ, etc.
//
// Revision 1.1  2000/06/30  16:20:58  wyllie
// Buffer management for Linux, phase I: abstract kernel mapping of page pool
// objects into the cxiIOBuffer_t class.  Define interfaces for manipulating
// such I/O buffers from the kernel.  Change all kernel users of Buffer objects
// to use the new interfaces, which are fully implemented for AIX and stubbed
// out for Linux.
//
 */

#include <Shark-gpl.h>

#include <linux/module.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/iobuf.h>
#include <linux/smp_lock.h>
#include <linux/kernel_stat.h>

#include <Trace.h>
#include <cxiSystem.h>
#include <linux2gpfs.h>
#include <cxiIOBuffer.h>
#include <cxiAtomic.h>
#include <linux/mman.h>

/* Count of number of delays in busy wait loop in cxiWaitIO */
atomic_t cxiWaitIONDelays;

#ifdef GPFS_ARCH_IA64
/* Work around IA64 kernel bug under handle_mm_fault (for map_user_kiobuf).
   Bring in pages and make sure they are marked w/ write & dirty. */
#if LINUX_KERNEL_VERSION >= 2040900
#define TOUCH_PAGES(BUF, LEN)
#else
/* broken at least at level 2.4.3-12 */
#define TOUCH_PAGES(BUF, LEN) \
{ \
  long i; \
  char *bP = (char *)BUF; \
  for (i = 0; i < LEN; i += PAGE_SIZE) \
    bP[i] = 0; \
}
#endif
#else
#define TOUCH_PAGES(BUF, LEN)
#endif /* GPFS_ARCH_IA64 */

/* Returns a page pointer from a cxiKernelIOBufferDesc_t 
 * The INDEX of the page to return is relative to the 
 * KIBDP supplied.  For instance a KIBD may only contain
 * twenty pages.  If you supply a KIBD and an index of twenty
 * (index starts from zero) then we'll move to the next KIBD 
 * in the chain and update the INDEX to be zero.  Thus PAGEINDEX,
 * KIBD, and PAGEP may be updated by this macro. 
 */
#define KIBD_GET_PAGE(KIBDP, INDEX, PAGEP)              \
  while ((KIBDP) && (INDEX) >= (KIBDP)->kibdPages)      \
  {                                                     \
    (INDEX) -= (KIBDP)->kibdPages;                      \
    (KIBDP) = (KIBDP)->kibdNextP;                       \
  }                                                     \
  if (KIBDP)                                            \
    (PAGEP) = (struct page *)(KIBDP)->maplist[(INDEX)]; \
  else                                                  \
    (PAGEP) = NULL;

/* From iobuf.h struct kiobuf map_array[]
 * Number of pages contained in one kiobuf 
 */
#define PAGES_PER_KIOBUF KIO_STATIC_PAGES

#define NUM_KIOBUFS_POOL (2*NR_CPUS)

typedef struct kiobuf cxiKiobuf_t;

/* A pool of temporary kiobufs which are used for mapping and unmapping */
typedef struct KiobufPool_t
{
  int top;              /* top element of the unused stack of kiobufs */
  int waiting;          /* number of threads waiting for a kiobuf */
  Boolean shutdown;     /* waiters should no longer try for a kiobuf */
  Simple_lock lock;  
  cxiWaitEvent_t wait;

  cxiKiobuf_t *kiobufPP[NUM_KIOBUFS_POOL]; /* array of temporary kiobufs */

/* modification in the -ac patch series */
#if LINUX_KERNEL_VERSION >= 2040900
  int nbhs[NUM_KIOBUFS_POOL];
#endif

} KiobufPool_t;
  
KiobufPool_t kiobufPool;
  
/* Initialize the kiobuf pool */
static int
kiobufPoolInit()
{
  int rc;

#ifdef GPFS_ARCH_POWER
  rc = alloc_kiovec(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP);
#else
#if LINUX_KERNEL_VERSION >= 2041800
  rc = alloc_kiovec(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP);

#elif LINUX_KERNEL_VERSION >= 2040900
  memset(kiobufPool.nbhs, 0, sizeof(kiobufPool.nbhs));
  rc = alloc_kiovec_sz(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP, kiobufPool.nbhs);

#else
  rc = alloc_kiovec(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP);
#endif
#endif /* !GPFS_ARCH_POWER */

  if (rc)
    return rc;

  kiobufPool.top = NUM_KIOBUFS_POOL - 1;
  kiobufPool.waiting = 0;
  kiobufPool.shutdown = false;

  simple_lock_init(&kiobufPool.lock);
  cxiWaitEventInit(&kiobufPool.wait);
 
  return 0;
}

/* Terminate use of the kiobuf pool */
static void
kiobufPoolTerm()
{
  simple_lock(&kiobufPool.lock);

  kiobufPool.shutdown = true;
  if (kiobufPool.waiting)
    cxiWaitEventBroadcast(&kiobufPool.wait);

  /* None of them should be in use */
  DBGASSERT(kiobufPool.top == (NUM_KIOBUFS_POOL - 1));
  
#ifdef GPFS_ARCH_POWER 
  free_kiovec(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP);
#else
#if LINUX_KERNEL_VERSION >= 2041800
  free_kiovec(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP);

#elif LINUX_KERNEL_VERSION >= 2040900
  free_kiovec_sz(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP, kiobufPool.nbhs);

#else
  free_kiovec(NUM_KIOBUFS_POOL, kiobufPool.kiobufPP);
#endif
#endif /* !GPFS_ARCH_POWER */

  simple_unlock(&kiobufPool.lock);
}

/* get a temporary kiobuf */
static cxiKiobuf_t *
kiobufPoolGet()
{
  cxiKiobuf_t *kiobP = NULL;

  simple_lock(&kiobufPool.lock);

  while (kiobufPool.top < 0)
  {
    kiobufPool.waiting++;
    e_sleep_thread(&kiobufPool.wait, &kiobufPool.lock, LOCK_HANDLER);
    kiobufPool.waiting--;

    if (kiobufPool.shutdown)
      goto xerror;
  }

  kiobP = kiobufPool.kiobufPP[kiobufPool.top];
  kiobufPool.kiobufPP[kiobufPool.top] = NULL; /* NULL for debug */
  kiobufPool.top--;

#if LINUX_KERNEL_VERSION >= 2040710 || NFS4_LINUX_2_4_4
  /* this makes me ill */
  kiobP->nr_pages = 0;
  DBGASSERT(kiobP->array_len == PAGES_PER_KIOBUF);
#if LINUX_KERNEL_VERSION < 2041804
  DBGASSERT(kiobP->maplist == kiobP->map_array);
#endif
  kiobP->offset = 0;
  kiobP->length = 0;
  DBGASSERT(kiobP->locked == 0);
#else
  kiobuf_init(kiobP);
#endif

  
xerror:
  simple_unlock(&kiobufPool.lock);
  return kiobP;
}

static void
kiobufPoolPut(cxiKiobuf_t *kiobP)
{
  simple_lock(&kiobufPool.lock);

  kiobufPool.top++;
  DBGASSERT(kiobufPool.top < NUM_KIOBUFS_POOL);
  kiobufPool.kiobufPP[kiobufPool.top] = kiobP;

  if (kiobufPool.waiting)
    cxiWaitEventSignal(&kiobufPool.wait);

  simple_unlock(&kiobufPool.lock);
}
    
/* Spin lock protecting list of all top-level cxiKernelIOBufferDesc_t's.
   Using a static initializer here (spinlock_t KibdLock = SPIN_LOCK_UNLOCKED)
   does not work, because SPIN_LOCK_UNLOCKED contains a cast to type spinlock_t.
   In C++, (but not in C), this causes KibdLock to be put in the bss section,
   and code to be generated to perform the initialization.  Unfortunately,
   this initialization code does not get called, because kernel modules do
   not have the full C++ environment established. */
spinlock_t KibdLock;

/* Static pointer to slab allocator for cxiKernelIOBufferDesc_t's */
kmem_cache_t* KibdCacheP = NULL;

/* Static head of doubly-linked list of top-level cxiKernelIOBufferDesc_t's.
   The list is protected by KibdLock. */
struct cxiKernelIOBufferDesc_t* KibdGblHeadP = NULL;

/* Group of Linux buffer_heads allocated together for a multi-page I/O */
#define BUFFER_HEADS_PER_CHUNK 21   /* Two chunks almost fill a page */
struct cxiBufHeadChunk_t
{
  /* Next and previous chunks of buffers used for an I/O.  The list is
     circular. */
  struct cxiBufHeadChunk_t* bhcNextP;
  struct cxiBufHeadChunk_t* bhcPrevP;

  /* Number of buffer_heads used in this chunk */
  int nBHUsed;

  /* Number of buffer_heads in this chunk that have been submitted, but
     whose iodone handler has not finished running.  Always updated
     with atomic operations, since this field is accessed asynchronously
     from interrupt level. */
  atomic_t nBHActive;

  /* Space for buffer_heads, one per page touched by an I/O operation */
  struct buffer_head bh[BUFFER_HEADS_PER_CHUNK];
};

/* Static pointer to slab allocator for cxiBufHeadChunk_t's */
kmem_cache_t* BhcCacheP = NULL;


/* Allocate and initialize a new cxiKernelIOBufferDesc_t object.  Uses the
   slab allocator for this object type. */
static struct cxiKernelIOBufferDesc_t* kibdAlloc()
{
  struct cxiKernelIOBufferDesc_t* kibdP;
  int i;

  kibdP = (struct cxiKernelIOBufferDesc_t*)
            kmem_cache_alloc(KibdCacheP, SLAB_KERNEL);
  TRACE1(TRACE_KSVFS, 14, TRCID_KIBD_NEW,
         "kibdAlloc: allocated cxiKernelIOBufferDesc_t at 0x%lX\n", kibdP);
  if (kibdP != NULL)
  {
    kibdP->kibdVaddr = NULL;
    kibdP->kibdPages = 0;
    kibdP->kibdTotalPages = 0;
    kibdP->kibdNextP = NULL;
    kibdP->gblNextP = NULL;
    kibdP->gblPrevP = NULL;

    for (i=0; i < PAGES_PER_KIBD; i++)
      kibdP->maplist[i] = NULL;
  }
  return kibdP;
}


/* Free a cxiKernelIOBufferDesc_t back to its slab allocator */
static void kibdFree(struct cxiKernelIOBufferDesc_t* kibdP)
{
  TRACE1(TRACE_KSVFS, 14, TRCID_KIBD_DELETE,
         "kibdFree: freeing cxiKernelIOBufferDesc_t at 0x%lX\n", kibdP);
  kmem_cache_free(KibdCacheP, (void*)kibdP);
}


/* Destroy a cxiKernelIOBufferDesc_t object. */
static void 
deallocKernelIOBufferDesc(struct cxiKernelIOBufferDesc_t* kibdP)
{
  cxiKiobuf_t *kiobP;
  struct cxiKernelIOBufferDesc_t *kibdPrevP;
  struct page *pageP;
  int pageIndex = 0;
  int kiobufIndex = 0;
  int pageTotal = kibdP->kibdTotalPages;

  kiobP = kiobufPoolGet();
  if (kiobP == NULL)
  {
    DBGASSERT(kiobP != NULL);
    return;
  }

  kiobP->nr_pages = 0;
  for (;;)
  {
    kibdPrevP = kibdP;
    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    if (pageP == NULL)
      break;
    
    if (kiobufIndex >= PAGES_PER_KIOBUF)
    {
      unmap_kiobuf(kiobP);
      kiobufIndex = 0;
      kiobP->nr_pages = 0;
    }
      
    kiobP->maplist[kiobufIndex] = pageP;
    kiobP->nr_pages++;

    if (kibdPrevP != kibdP)
    {
      TRACE4(TRACE_KSVFS, 11, TRCID_DEALLOC_KIBD_1,
             "deallocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d "
             "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, 
             kibdPrevP->kibdPages, kibdP);
      pageTotal -= kibdPrevP->kibdPages;
      kibdFree(kibdPrevP);
    }

    kiobufIndex++;
    pageIndex++;
  }

  if (kibdPrevP != kibdP && kibdPrevP)
  {
    TRACE4(TRACE_KSVFS, 11, TRCID_DEALLOC_KIBD_2,
           "deallocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d "
           "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, 
           kibdPrevP->kibdPages, kibdP);
    pageTotal -= kibdPrevP->kibdPages;
    kibdFree(kibdPrevP);
  }

  if (kiobP->nr_pages)
    unmap_kiobuf(kiobP);

  /* Make sure all the constituent cxiKernelIODesc_t page counts added
   * up to the total page count in the first cxiKernelIODesct_t 
   */
  DBGASSERT(pageTotal == 0);

  kiobufPoolPut(kiobP);
}


/* Create a cxiKernelIOBufferDesc_t that maps the given region of
 * the user address space of this process.  The buffer virtual address
 * must be on a page boundary.
 */
static int 
allocKernelIOBufferDesc(char* vaddr, int nPages,
                        struct cxiKernelIOBufferDesc_t** kibdPP)
{
  cxiKiobuf_t *kiobP;
  struct cxiKernelIOBufferDesc_t* kibdP;
  struct cxiKernelIOBufferDesc_t* kibdPrevP = NULL;
  struct cxiKernelIOBufferDesc_t* kibdHeadP = NULL;
  int rc;
  int kibdIndex;
  int kibdPages;
  int mapPages;
  int mapIndex;
  int totalPages = 0;
  int j;
  struct page * pageP;
  struct address_space * addrSpaceP;

  kiobP = kiobufPoolGet();
  if (kiobP == NULL)
  {
    rc = -ENOMEM;
    goto errorExit;
  }

  /* Validate parameters */
  DBGASSERT(((IntPtr)vaddr & (PAGE_SIZE-1)) == 0);

  if (nPages)
  {
    kibdHeadP = kibdP = kibdAlloc();
    if (kibdP == NULL)
    {
      rc = -ENOMEM;
      goto errorExit;
    }
    kibdP->kibdVaddr = vaddr;
  }

  while (nPages) 
  {
    mapPages = nPages;
    if (mapPages > PAGES_PER_KIOBUF)
      mapPages = PAGES_PER_KIOBUF;

    kiobP->nr_pages = 0;
    TOUCH_PAGES(vaddr, (size_t)mapPages * PAGE_SIZE);
    rc = map_user_kiobuf(READ, kiobP, (unsigned long)vaddr, 
                         (size_t)mapPages * PAGE_SIZE);
    if (rc != 0)
      goto errorExit;

    for (mapIndex = 0; mapIndex < mapPages; mapIndex += kibdPages)
    {
      kibdIndex = kibdP->kibdPages;
      if (kibdIndex >= PAGES_PER_KIBD)
      {
        kibdPrevP = kibdP;
        kibdP = kibdAlloc();
        if (kibdP == NULL)
        {
          rc = -ENOMEM;
          goto errorExit;
        }
        kibdPrevP->kibdNextP = kibdP;
        kibdP->kibdVaddr = vaddr;
        kibdIndex = 0;

        TRACE4(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_1,
               "allocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d "
               "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, 
               kibdPrevP->kibdPages, kibdP);
      }

      kibdPages = mapPages - mapIndex;
      if (kibdPages > (PAGES_PER_KIBD - kibdIndex))
        kibdPages = PAGES_PER_KIBD - kibdIndex;
     
      for (j = 0; j < kibdPages; j++, kibdIndex++)
        kibdP->maplist[kibdIndex] = (char *)kiobP->maplist[mapIndex + j];

      kibdP->kibdPages += kibdPages;
      totalPages += kibdPages;
      vaddr += kibdPages * PAGE_SIZE;
    }
    nPages -= mapPages;
  }
  TRACE4(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_2,
         "allocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d "
         "totalPages %d\n", kibdP, kibdP->kibdVaddr, kibdP->kibdPages,
         totalPages);

  /* Total page count is kept only in the first one */
  kibdHeadP->kibdTotalPages = totalPages;

  /* Remove pages from the page cache.  These pages shouldn't be mapped
   * to any inode, otherwise we won't be able to disclaim them.  We 
   * did have a problem where MAP_SHARED semantics would cause this.
   * (see kxMapPrivate)
   */
  pageP = (struct page *)kibdHeadP->maplist[0];
  DBGASSERT(pageP != NULL);
  addrSpaceP = pageP->mapping;

#ifdef NOKXMAP
  /* the old way of doing things */
  if (addrSpaceP != NULL)
  {
    TRACE4(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_UNCACHE,
           "allocKernelIOBufferDesc: kibdHeadP 0x%lX pageP 0x%lX count %d "
           "addrSpaceP 0x%lX\n", kibdHeadP, pageP, atomic_read(&pageP->count),
           addrSpaceP);
#ifndef GPFS_ARCH_POWER      // 2031304 but no -ac patch
    if (addrSpaceP->host != NULL)
      flush_inode_pages(addrSpaceP->host);
#endif
  }
#else
  DBGASSERT(addrSpaceP == NULL || addrSpaceP->host == NULL);
#endif /* KXMAP */

  kiobufPoolPut(kiobP);

  /* Success! */
  *kibdPP = kibdHeadP;
  return 0;

errorExit:

  if (kiobP)
    kiobufPoolPut(kiobP);

  /* Unmap and deallocate kiobufs, delete cxiKernelIOBufferDesc_t */
  if (kibdHeadP)
  { 
    kibdHeadP->kibdTotalPages = totalPages;
    deallocKernelIOBufferDesc(kibdHeadP);
  }
  return rc;
}


/* Initialization routine - called when module is loaded */
void KibdModuleInit()
{
  int rc;

  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_INIT,
         "KibdModuleInit called\n");

  /* Create a slab allocator for cxiKernelIOBufferDesc_t objects */
  KibdCacheP = kmem_cache_create("kernIOBufDesc",
                                 sizeof(struct cxiKernelIOBufferDesc_t),
                                 0 /* offset */,
                                 0 /* flags */,
                                 NULL /* ctor */,
                                 NULL /* dtor */);
  if (KibdCacheP == NULL)
    cxiPanic("Cannot create cxiKernelIOBufferDesc_t cache\n");

  spin_lock_init(&KibdLock);

  /* Create a slab allocator for cxiBufHeadChunk_t objects */
  BhcCacheP = kmem_cache_create("BufHeadChunk",
                                 sizeof(struct cxiBufHeadChunk_t),
                                 0 /* offset */,
                                 0 /* flags */,
                                 NULL /* ctor */,
                                 NULL /* dtor */);
  if (BhcCacheP == NULL)
    cxiPanic("Cannot create cxiBufHeadChunk_t cache\n");
  atomic_set(&cxiWaitIONDelays, 0);

  rc = kiobufPoolInit();
  if (rc)
    cxiPanic("Cannot allocate kiobuf pool\n");
}


/* Termination routine - called just before module is unloaded */
void KibdModuleTerm()
{
  int rc;

  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_TERM,
         "KibdModuleTerm called\n");

  /* Destroy slab allocator for cxiBufHeadChunk_t objects */
  rc = kmem_cache_destroy(BhcCacheP);
  if (rc)
    TRACE1(TRACE_KSVFS, 1, TRCID_BHC_CACHE_DESTROY,
           "KibdModuleTerm: ERROR! BufHeadChunk cache destroy rc %d", rc);

  /* We have to ensure these are all deallocated otherwise
   * the kmem_cache_destroy of the KibdCacheP will fail.
   * An attempt to reload GPFS would encounter the slab
   * cache still existing.
   */
  cxiKibdUnpinAll();

  /* Destroy slab allocator for cxiKernelIOBufferDesc_t objects */
  rc = kmem_cache_destroy(KibdCacheP);
  if (rc)
    TRACE1(TRACE_KSVFS, 1, TRCID_KIBD_CACHE_DESTROY,
           "KibdModuleTerm: ERROR! KernelIOBufferDesc cache destroy rc %d", rc);

  kiobufPoolTerm();
}


/* Create a cxiKernelIOBufferDesc_t object (or list of cxiKernelIOBufferDesc_t
   objects) describing an I/O buffer in the user address space of the
   calling process and link it onto the list of all such objects.  Pins
   the user-level buffer.  The buffer virtual address must be on a page
   boundary.  The length can be arbitrarily large, but must be a multiple
   of the page size.  Returns 0 if successful, non-zero if unsuccessful.
   */
int cxiKibdPin(char* vaddr, int len, struct cxiKernelIOBufferDesc_t** kibdPP)
{
  int nPages;
  struct cxiKernelIOBufferDesc_t* headP;
  struct cxiKernelIOBufferDesc_t* kibdP;
  int rc;

  /* Validate parameters */
  TRACE2(TRACE_KSVFS, 5, TRCID_KIBDPIN_ENTER,
         "cxiKibdPin: vaddr 0x%lX len 0x%X\n",
         vaddr, len);
  DBGASSERT(((IntPtr)vaddr & (PAGE_SIZE-1)) == 0);
  DBGASSERT((len & (PAGE_SIZE-1)) == 0);

  nPages = len / PAGE_SIZE;
  rc = allocKernelIOBufferDesc(vaddr, nPages, &headP);
  if (rc != 0)
    return rc;

  /* Add this cxiKernelIOBufferDesc_t to the global list before returning */
  TRACE1(TRACE_KSVFS, 12, TRCID_KIBDPIN_EXIT,
         "cxiKibdPin exit: returning 0x%lX\n", headP);

  spin_lock(&KibdLock);
  headP->gblNextP = KibdGblHeadP;
  if (KibdGblHeadP != NULL)
    KibdGblHeadP->gblPrevP = headP;
  KibdGblHeadP = headP;
  spin_unlock(&KibdLock);

  *kibdPP = headP;
  return 0;
}


/* Remove a cxiKernelIOBufferDesc_t object from the list of all
   such objects, destroy it and all chained cxiKernelIOBufferDesc_t objects
   associated with it, and unpin the associated user-level buffer. */
void cxiKibdUnpin(struct cxiKernelIOBufferDesc_t* kibdP)
{
  struct cxiKernelIOBufferDesc_t* nextP;
  struct cxiKernelIOBufferDesc_t* prevP;

  /* Remove this cxiKernelIOBufferDesc_t from the global list */
  spin_lock(&KibdLock);
  nextP = kibdP->gblNextP;
  prevP = kibdP->gblPrevP;
  if (nextP != NULL)
    nextP->gblPrevP = prevP;
  if (prevP != NULL)
    prevP->gblNextP = nextP;
  else
    KibdGblHeadP = nextP;
  spin_unlock(&KibdLock);

  /* Free the cxiKernelIOBufferDesc_t */
  deallocKernelIOBufferDesc(kibdP);
}


/* Free all cxiKernelIOBufferDesc_t's, and unpin their underlying storage. */
void cxiKibdUnpinAll()
{
  struct cxiKernelIOBufferDesc_t* nextP;
  struct cxiKernelIOBufferDesc_t* kibdP;

  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_UNPIN_ALL_ENTER,
         "cxiKibdUnpinAll entry\n");
  for (;;)
  {
    /* Remove first cxiKernelIOBufferDesc_t on global list */
    spin_lock(&KibdLock);
    kibdP = KibdGblHeadP;
    if (kibdP == NULL)
    {
      spin_unlock(&KibdLock);
      break;
    }
    nextP = kibdP->gblNextP;
    if (nextP != NULL)
      nextP->gblPrevP = NULL;
    KibdGblHeadP = nextP;
    spin_unlock(&KibdLock);

    /* Deallocate the cxiKernelIOBufferDesc_t and unpin its storage */
    deallocKernelIOBufferDesc(kibdP);
  }
  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_UNPIN_ALL_EXIT,
         "cxiKibdUnpinAll exit\n");
}


/* Given a cxiKernelIOBufferDesc_t *kibdP mapping an area of memory, split
   it into two pieces.  The first frontPages pages will belong to *frontPP,
   and the excess pages will be assigned to *rearPP.  frontPages must be
   strictly less the initial length of *kibdP.  kibdP may be reused as
   either *frontPP or *rearPP, so it should not be used after a successful
   return from this routine.  If this routine fails (rc not 0), then
   nothing is known about the state of the orginal *kibdP or either
   **frontPP or **rearPP. */
int cxiKibdSplit(struct cxiKernelIOBufferDesc_t* kibdP,
                 int frontPages,
                 struct cxiKernelIOBufferDesc_t** frontPP,
                 struct cxiKernelIOBufferDesc_t** rearPP)
{
  char* vaddr;
  int nPages;
  int rc;
  struct cxiKernelIOBufferDesc_t* frontP;
  struct cxiKernelIOBufferDesc_t* rearP;

  /* Initial implementation is to free the input (*kibdP) and build two
     new cxiKernelIOBufferDesc_t's from scratch. */
  vaddr = kibdP->kibdVaddr;
  nPages = kibdP->kibdTotalPages;
  TRACE4(TRACE_KSVFS, 13, TRCID_KIBD_SPLIT,
         "cxiKibdSplit enter: kibdP 0x%lX vaddr 0x%lX nPages %d "
         "frontPages %d\n", kibdP, vaddr, nPages, frontPages);
  DBGASSERT(frontPages < nPages);

  cxiKibdUnpin(kibdP);
  rc = cxiKibdPin(vaddr, frontPages*PAGE_SIZE, &frontP);
  if (rc == 0)
    rc = cxiKibdPin(vaddr+frontPages*PAGE_SIZE, (nPages-frontPages)*PAGE_SIZE,
                    &rearP);
  *frontPP = frontP;
  *rearPP = rearP;
  TRACE5(TRACE_KSVFS, 13, TRCID_KIBD_SPLIT_EXIT,
         "cxiKibdSplit exit: frontP 0x%lX frontPages %d rearP 0x%lX "
         "rearLen %d rc %d\n", frontP, frontPages, rearP, 
         nPages-frontPages, rc);
  return rc;
}


/* Combine two cxiKernelIOBufferDesc_t's *frontP and *rearP that map
   contiguous buffers into a new cxiKernelIOBufferDesc_t **mergedPP.  The
   *frontP and *rearP objects may be reused within **mergedPP, so they
   should not be used after a successful return from this routine.  If
   this routine fails (rc not 0), then nothing is known about the state
   of the orginal *frontP or *rearP or **mergedPP.*/
int cxiKibdMerge(struct cxiKernelIOBufferDesc_t* frontP,
                 struct cxiKernelIOBufferDesc_t* rearP,
                 struct cxiKernelIOBufferDesc_t** mergedPP)
{
  char* vaddr;
  int nPages;
  int rc;
  struct cxiKernelIOBufferDesc_t* mergedP;

  /* Initial implementation is to free the two input cxiKernelIOBufferDesc_t's
     and build a new cxiKernelIOBufferDesc_t from scratch. */
  vaddr = frontP->kibdVaddr;
  nPages = frontP->kibdTotalPages;
  TRACE6(TRACE_KSVFS, 13, TRCID_KIBD_MERGE,
         "cxiKibdMerge enter: frontP 0x%lX vaddr 0x%lX nPages %d rearP 0x%lX "
         "vaddr 0x%lX nPages %d\n",
         frontP, vaddr, nPages, rearP, rearP->kibdVaddr, rearP->kibdTotalPages);
  DBGASSERT(rearP->kibdVaddr == vaddr+nPages*PAGE_SIZE);

  cxiKibdUnpin(frontP);
  nPages += rearP->kibdTotalPages;
  cxiKibdUnpin(rearP);
  rc = cxiKibdPin(vaddr, nPages*PAGE_SIZE, &mergedP);
  TRACE4(TRACE_KSVFS, 13, TRCID_KIBD_MERGE_EXIT,
         "cxiKibdMerge exit: mergedP 0x%lX vaddr 0x%lX nPages %d rc %d\n",
         mergedP, vaddr, nPages, rc);
  *mergedPP = mergedP;
  return rc;
}


/* Attach an I/O buffer to the kernel's virtual address space.  The
   cxiIOBufferAttachment_t returned in *attachP must be used as a parameter of
   most of the other operations on cxiIOBuffer_t's. */
void cxiAttachIOBuffer(struct cxiIOBuffer_t* iobP,
                       struct cxiIOBufferAttachment_t* attachP)
{
  int oldPinCount;
  int newPinCount;
  int rc;

  /* Increase the pin count on this I/O buffer.  If the buffer is not already
     pinned, call the pinBuffer callback routine to arrange for the buffer
     to be pinned, then try again. */
  TRACE1(TRACE_KSVFS, 5, TRCID_ATTACH_ENTER,
         "cxiAttachIOBuffer: dataPtr 0x%lX\n", OffsetToDataPtr(iobP,0,0));
  for (;;)
  {
    oldPinCount = iobP->pinCount;
    DBGASSERT(oldPinCount > 0);
    if (oldPinCount == 0)
    {
        DBGASSERT(oldPinCount > 0);
        break;
	//      rc = xxx->pinBufferCallback(iobP);
	//      if (rc != 0)
	//        return rc;
    }
    else
    {
      newPinCount = oldPinCount+1;
      rc = compare_and_swap((atomic_p)&iobP->pinCount, &oldPinCount, 
                            newPinCount);
      if (rc == 1)
        break;
    }
  }

  /* Once the pin of the buffer succeeds, it must have a 
   * cxiKernelIOBufferDesc_t.  Use that as the attachment data. 
   */
  DBGASSERT(iobP->kernelIOBufferDescP != NULL);
  attachP->kDescP = iobP->kernelIOBufferDescP;
  TRACE2(TRACE_KSVFS, 11, TRCID_ATTACH_KIBD,
         "cxiAttachIOBuffer: kernelIOBufferDescP 0x%lX newPinCount %d\n",
         iobP->kernelIOBufferDescP, newPinCount);
}


/* Detach a buffer from the kernel's virtual address space. */
void cxiDetachIOBuffer(struct cxiIOBuffer_t* iobP,
                       struct cxiIOBufferAttachment_t* attachP)
{
  /* Validate attachment data */
  TRACE3(TRACE_KSVFS, 5, TRCID_DETACH_KIBD,
         "cxiDetachIOBuffer: dataPtr 0x%lX kDescP 0x%lX oldPinCount %d\n",
         OffsetToDataPtr(iobP,0,0), attachP->kDescP, iobP->pinCount);
  if (attachP->kDescP == NULL)
    return;
  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);

  /* Decrement I/O buffer pin count */
  DBGASSERT(iobP->pinCount >= 2);
  ATOMIC_ADD(&iobP->pinCount, -1);

  /* Invalidate attachment data */
  attachP->kDescP = NULL;
}


/* Transfer len bytes beginning at offset bufOffset within I/O buffer *iobP
   to or from a user buffer.  The direction of the transfer is given with
   respect to the I/O buffer.  Returns EOK if successful, other error
   codes if unsuccessful. */
int cxiUXfer(struct cxiIOBuffer_t* iobP, Boolean toIOBuffer,
             const struct cxiIOBufferAttachment_t* attachP,
             void* vkopP, int bufOffset, int len, struct cxiUio_t* uioP)
{
  int pageIndex;
  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
  int pageOffset;
  struct page * pageP;
  int pageLen;
  unsigned long kaddr;
  int rc = 0;

  /* Validate parameters */
  TRACE5(TRACE_KSVFS, 5, TRCID_UXFER_LINUX,
         "cxiUXfer: dataPtr 0x%lX kBuf 0x%lX toIOBuf %d offset %d len %d\n",
         OffsetToDataPtr(iobP,0,0), kibdP, toIOBuffer, bufOffset, len);

  DBGASSERT(bufOffset >= 0);
  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
  DBGASSERT(iobP->ioBufLen/PAGE_SIZE == kibdP->kibdTotalPages);
  DBGASSERT(iobP->pinCount >= 2);

  /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary
     to satisfy the data move request */
  pageIndex = bufOffset / PAGE_SIZE;
  pageOffset = bufOffset % PAGE_SIZE;
  pageLen = PAGE_SIZE - pageOffset;
  for (;;)
  {
    /* Calculate how many bytes to move in or out of the current page of the
       I/O buffer */
    if (len < pageLen)
      pageLen = len;

    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    DBGASSERT(pageP != NULL);

    /* Map current I/O buffer page into the kernel's address space
       temporarily, then copy data in or out of the page */
    kaddr = (unsigned long)kmap(pageP);
    TRACE4(TRACE_KSVFS, 12, TRCID_UXFER_UIOMOVE,
           "cxiUXfer: uiomove pageIndex %d kaddr 0x%lX pageOffset %d "
           "pageLen %d\n", pageIndex, kaddr, pageOffset, pageLen);

    rc = cxiUiomove((char *)(kaddr + pageOffset), pageLen, toIOBuffer, uioP);
    kunmap(pageP);

    /* Leave loop if an error occurred on the move */
    if (rc != 0)
      break;

    /* Update length left to copy and test for loop termination */
    len -= pageLen;
    if (len <= 0)
      break;

    /* Set up for next iteration.  If the page just copied is the last
       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
    pageOffset = 0;
    pageLen = PAGE_SIZE;
    pageIndex += 1;
  }  /* end of do forever */

  return rc;
}


/* Transfer len bytes beginning at offset bufOffset within I/O buffer *iobP
   to or from a contiguous kernel buffer.  The direction of the transfer
   is given with respect to the I/O buffer.  Returns EOK if successful,
   other error codes if unsuccessful. */
int cxiKXfer(struct cxiIOBuffer_t* iobP, Boolean toIOBuffer,
             const struct cxiIOBufferAttachment_t* attachP,
             int bufOffset, int len, char* kBufP)
{
  int pageIndex;
  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
  int pageOffset;
  struct page * pageP;
  int pageLen;
  unsigned long kaddr;

  /* Validate parameters */
  TRACE6(TRACE_KSVFS, 5, TRCID_KXFER_LINUX,
         "cxiKXfer: dataPtr 0x%lX kBuf 0x%lX toIOBuf %d offset %d len %d "
         "kBufP 0x%lX\n", OffsetToDataPtr(iobP,0,0), kibdP,
         toIOBuffer, bufOffset, len, kBufP);

  DBGASSERT(bufOffset >= 0);
  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
  DBGASSERT(iobP->ioBufLen/PAGE_SIZE == kibdP->kibdTotalPages);
  DBGASSERT(iobP->pinCount >= 2);

  /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary
     to satisfy the data move request */
  pageIndex = bufOffset / PAGE_SIZE;
  pageOffset = bufOffset % PAGE_SIZE;
  pageLen = PAGE_SIZE - pageOffset;
  for (;;)
  {
    /* Calculate how many bytes to move in or out of the current page of the
       I/O buffer */
    if (len < pageLen)
      pageLen = len;

    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    DBGASSERT(pageP != NULL);

    /* Map current I/O buffer page into the kernel's address space
       temporarily, then copy data in or out of the page */
    kaddr = (unsigned long)kmap(pageP);
    TRACE5(TRACE_KSVFS, 12, TRCID_KXFER_MEMCPY,
           "cxiKXfer: move kibdP 0x%lX pageIndex %d kaddr 0x%lX "
           "pageOffset %d pageLen %d\n",
           kibdP, pageIndex, kaddr, pageOffset, pageLen);

    if (toIOBuffer)
      memcpy((void *)(kaddr + pageOffset), kBufP, pageLen);
    else
      memcpy(kBufP, (void *)(kaddr + pageOffset), pageLen);
    kunmap(pageP);

    /* Update length left to copy and test for loop termination */
    len -= pageLen;
    if (len <= 0)
      break;

    /* Set up for next iteration.  If the page just copied is the last
       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
    kBufP += pageLen;
    pageOffset = 0;
    pageLen = PAGE_SIZE;
    pageIndex += 1;
  }  /* end of do forever */

  return 0;
}


/* Set len bytes beginning at offset bufOffset within I/O buffer *iobP
   to zero.  Returns EOK if successful, other error codes if unsuccessful. */
int cxiKZero(struct cxiIOBuffer_t* iobP,
             const struct cxiIOBufferAttachment_t* attachP,
             int bufOffset, int len)
{
  int pageIndex;
  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
  int pageOffset;
  struct page * pageP;
  int pageLen;
  unsigned long kaddr;

  /* Validate parameters */
  TRACE4(TRACE_KSVFS, 5, TRCID_KZERO_LINUX,
         "cxiKZero: dataPtr 0x%lX kBuf 0x%lX offset %d len %d\n",
         OffsetToDataPtr(iobP,0,0), kibdP, bufOffset, len);

  DBGASSERT(bufOffset >= 0);
  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
  DBGASSERT(iobP->ioBufLen/PAGE_SIZE == kibdP->kibdTotalPages);
  DBGASSERT(iobP->pinCount >= 2);

  /* Zero data in as many cxiKernelIOBufferDesc_t's as necessary to complete
     the request */
  pageIndex = bufOffset / PAGE_SIZE;
  pageOffset = bufOffset % PAGE_SIZE;
  pageLen = PAGE_SIZE - pageOffset;
  for (;;)
  {
    /* Calculate how many bytes to zero in the current page of the I/O
       buffer */
    if (len < pageLen)
      pageLen = len;

    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    DBGASSERT(pageP != NULL);

    /* Map current I/O buffer page into the kernel's address space
       temporarily, then zero data in the page */
    kaddr = (unsigned long)kmap(pageP);
    TRACE4(TRACE_KSVFS, 12, TRCID_KZERO_MEMSET,
           "cxiKZero: zero pageIndex %d kaddr 0x%lX pageOffset %d pageLen %d\n",
           pageIndex, kaddr, pageOffset, pageLen);
    memset((void *)(kaddr + pageOffset), 0, pageLen);
    kunmap(pageP);

    /* Update length left to zero and test for loop termination */
    len -= pageLen;
    if (len <= 0)
      break;

    /* Set up for next iteration.  If the page just zeroed is the last
       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
    pageOffset = 0;
    pageLen = PAGE_SIZE;
    pageIndex += 1;
  }  /* end of do forever */

  return 0;
}


/* Map an I/O buffer so it can be read and written from kernel code
   running in the context of a user thread.  Depending on the platform, the
   addresses at which the I/O buffer gets mapped may not be contiguous.  The
   details of how the buffer got mapped are handled by the
   cxiDiscontiguousDirectoryBuffer_t object that is filled in by this call.
   On some platforms, mapping buffers using this call consumes scarce
   resources, so all cxiMapDiscontiguousRW calls should be promptly matched by
   cxiUnmapDiscontiguousRW calls as soon as the operation that required access
   to the I/O buffer completes.  Returns 0 if successful, other error codes
   if unsuccessful. */
int cxiMapDiscontiguousRW(struct cxiIOBuffer_t* iobP,
                          const struct cxiIOBufferAttachment_t* attachP,
                          struct cxiDiscontiguousDirectoryBuffer_t* discontigP)
{
  /* ?? WARNING: Since this must kmap multiple pages, there is the
     possibility of deadlock if multiple threads are part of the way through
     executing this code, and LAST_PKMAP pages (512 or 1024) have already
     been kmapped.  There needs to be flow control whereby threads reserve
     enough pages to complete all of their kmaps before they begin acquiring
     pages. */
  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
  int pageIndex;
  int dirIndex;
  struct page * pageP;
  unsigned long kaddr;

  /* __CXI_BUFFERS_ARE_CONTIGUOUS is not #defined */

  /* Validate parameters */
  TRACE3(TRACE_KSVFS, 4, TRCID_MAP_DISCONTIG_ENTER,
         "cxiMapDiscontiguousRW: dataPtr 0x%lX kBufP 0x%lX ioBufLen 0x%X\n",
         OffsetToDataPtr(iobP,0,0), kibdP, iobP->ioBufLen);

  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
  DBGASSERT(iobP->pinCount >= 2);

  pageIndex = 0;
  dirIndex = 0;
  for (;;)
  {
    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    if (pageP == NULL)
      break;

    kaddr = (unsigned long)kmap(pageP);
    TRACE4(TRACE_KSVFS, 12, TRCID_MAP_DISCONTIG_KMAP,
           "cxiMapDiscontiguousRW: pageIndex %d kibdP 0x%lX pageP 0x%lX "
           "kaddr 0x%lX\n", pageIndex, kibdP, pageP, kaddr);

    DBGASSERT(dirIndex < MAX_PAGES_PER_DIRBLOCK);
    discontigP->userPagePointerArray[dirIndex] = (char*)kaddr;
    discontigP->osPagePointerArray[dirIndex] = (void*)pageP;

    pageIndex++;
    dirIndex++;
  }

  DBGASSERT((iobP->ioBufLen / PAGE_SIZE) == dirIndex);
  discontigP->mappedLen = iobP->ioBufLen;
  return 0;
}


/* Unmap an I/O buffer previously mapped */
void 
cxiUnmapDiscontiguousRW(struct cxiIOBuffer_t* iobP,
                        struct cxiDiscontiguousDirectoryBuffer_t* discontigP)
{
  int pageIndex;
  struct page * pageP;

  TRACE4(TRACE_KSVFS, 4, TRCID_UNMAP_DISCONTIG_ENTER,
         "cxiUnmapDiscontiguousRW: dataPtr 0x%lX kBufP 0x%lX ioBufLen 0x%X "
         "mappedLen %d\n", OffsetToDataPtr(iobP,0,0), iobP->kernelIOBufferDescP,
         iobP->ioBufLen, discontigP->mappedLen);

  /* Unmap all pages in discontiguous map.  If the osPagePointerArray entry
     is NULL, it means that the last mapping was made via MapContiguousBuffer,
     which did not do any kmaps that need to be kunmap'ped. */
  for (pageIndex=0 ; pageIndex<discontigP->mappedLen/DISCONTIG_PAGE_SIZE ;
       pageIndex++)
  {
    pageP = (struct page *)discontigP->osPagePointerArray[pageIndex];
    TRACE3(TRACE_KSVFS, 12, TRCID_UNMAP_DISCONTIG_KUNMAP,
           "cxiUnmapDiscontiguousRW: unmap pageIndex %d pageP 0x%lX "
           "kaddr 0x%lX\n", pageIndex, pageP, 
           discontigP->userPagePointerArray[pageIndex]);

    if (pageP != NULL)
    {
      kunmap(pageP);
      discontigP->osPagePointerArray[pageIndex] = NULL;
    }
    discontigP->userPagePointerArray[pageIndex] = NULL;
  }
  discontigP->mappedLen = 0;
}

/* Return an address in kernel memory that holds a contigous read-only
   copy of a portion of an I/O buffer.  If possible, this will be a
   mapping of the I/O buffer.  If necessary, this routine will allocate a
   new block of kernel memory and copy the requested data to it.  The
   returned cxiContiguousBuffer_t encapsulates what method was used, so
   that cxiUnmapContiguousRO can release whatever resources were obtained by
   this call.  Returns 0 if successful, other error codes if
   unsuccessful. */
int cxiMapContiguousRO(struct cxiIOBuffer_t* iobP,
                       const struct cxiIOBufferAttachment_t* attachP,
                       int bufOffset, int len, const char** contigBasePP,
                       struct cxiContiguousBuffer_t* contigP)
{
  int pageIndex;
  int pageOffset;
  int endPageIndex;
  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; 
  struct page * pageP;
  unsigned long kaddr;
  char* tempBufP;
  int rc;

  /* Validate parameters */
  TRACE4(TRACE_KSVFS, 4, TRCID_MAP_CONTIG_ENTER,
         "cxiMapContiguousRO: dataPtr 0x%lX kBufP 0x%lX bufOffset %d len %d\n",
         OffsetToDataPtr(iobP,0,0), kibdP, bufOffset, len);

  DBGASSERT(bufOffset >= 0);
  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
  DBGASSERT(iobP->ioBufLen/PAGE_SIZE == kibdP->kibdTotalPages);
  DBGASSERT(iobP->pinCount >= 2);

  /* If the requested piece of the I/O buffer does not cross a page boundary,
     then map the page and return the mapped address within the page */
  pageIndex = bufOffset / PAGE_SIZE;
  pageOffset = bufOffset % PAGE_SIZE;
  endPageIndex = (bufOffset+len-1) / PAGE_SIZE;
  if (pageIndex == endPageIndex)
  {
    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    DBGASSERT(pageP != NULL);

    /* Map I/O buffer page into the kernel's address space */
    kaddr = (unsigned long)kmap(pageP);

    /* Return address within the mapped page, and set map state so
       cxiUnmapContiguousRO knows to do kunmap */
    *contigBasePP = (char*) (kaddr+pageOffset);
    contigP->mallocedBaseP = NULL;
    contigP->pageP = pageP;
    TRACE2(TRACE_KSVFS, 5, TRCID_MAP_CONTIG_KMAP,
           "cxiMapContiguousRO: mapped pageP 0x%lX at 0x%lX\n",
           pageP, *contigBasePP);
    return 0;
  }

  /* Otherwise, the requested part of the I/O buffer spans page boundaries.
     Allocate a contiguous buffer, and copy data from the I/O buffer to the
     temporary buffer. */
  else
  {
    /* ?? This should use kmalloc if len is less than a page */
    tempBufP = (char*)vmalloc(len);
    if (tempBufP == NULL)
      return -ENOMEM;
    rc = cxiKXfer(iobP, CXI_XFER_FROM_IOBUFFER, attachP, bufOffset, len,
                  tempBufP);
    if (rc != 0)
    {
      vfree((void*)tempBufP);
      return rc;
    }

    /* Return address within the contiguous temporary buffer, and set map
       state so cxiUnmapContiguousRO knows to do vfree */
    *contigBasePP = tempBufP;
    contigP->mallocedBaseP = tempBufP;
    contigP->pageP = NULL;
    TRACE1(TRACE_KSVFS, 5, TRCID_MAP_CONTIG_VMALLOC,
           "cxiMapContiguousRO: copied to 0x%lX\n", tempBufP);
    return 0;
  }
}


/* Release a mapping or copy obtained with cxiMapContiguousRO */
void cxiUnmapContiguousRO(struct cxiIOBuffer_t* iobP,
                          struct cxiContiguousBuffer_t* contigP)
{
  if (contigP->mallocedBaseP != NULL)
  {
    TRACE2(TRACE_KSVFS, 4, TRCID_UNMAP_CONTIG_VFREE,
           "cxiUnmapContiguousRO: dataPtr 0x%lX vfree 0x%lX\n",
           OffsetToDataPtr(iobP,0,0), contigP->mallocedBaseP);
    DBGASSERT(contigP->pageP == NULL);
    vfree((void*)contigP->mallocedBaseP);
    contigP->mallocedBaseP = NULL;
  }
  else
  {
    TRACE2(TRACE_KSVFS, 4, TRCID_UNMAP_CONTIG_KUNMAP,
           "cxiUnmapContiguousRO: dataPtr 0x%lX kunmap 0x%lX\n",
           OffsetToDataPtr(iobP,0,0), contigP->pageP);
    DBGASSERT(contigP->pageP != NULL);
    kunmap((struct page *)contigP->pageP);
    contigP->pageP = NULL;
  }
}


/* iodone routine for GPFS buffer_heads.  Unlock buffer and wake up
   waiters, if any. */
static void BHioDone(struct buffer_head* bhP, int uptodate)
{
  struct cxiBufHeadChunk_t* bhcP;

  mark_buffer_uptodate(bhP, uptodate);
  bhcP = (struct cxiBufHeadChunk_t*)bhP->b_private;
  unlock_buffer(bhP);
  atomic_dec(&bhcP->nBHActive);
}


/* Start a read or write of the given sectors from dev.  Data should be
   placed into the I/O buffer beginning at byte offset bufOffset.  Returns
   0 on success, negative values on error.  All of the data to be
   transferred will be in the first cxiKernelIOBufferDesc_t. */
int 
cxiStartIO(struct cxiKernelIOBufferDesc_t* kibdHeadP,
           Boolean isWrite, cxiDev_t dev, Int32 startSector, int nSectors,
           int bufOffset, struct cxiBufHeadChunk_t** bhcHeadPP)
{
  int bufEndOffset;
  int nTotalPages;
  struct cxiBufHeadChunk_t* bhcP;
  struct cxiBufHeadChunk_t* bhcHeadP;
  struct cxiBufHeadChunk_t* bhcTailP;
  int nBHsAllocated;
  kdev_t kdev;
  int pageIndex;
  int pageOffset;
  int sectorsThisBH;
  struct buffer_head* bhP;
  struct page* pageP;
  struct cxiBufHeadChunk_t* p;
  struct cxiKernelIOBufferDesc_t* kibdP = kibdHeadP;

  /* Validate parameters */
  TRACE6(TRACE_IO, 4, TRCID_KDOIO_LINUX,
         "cxiStartIO: kBuf 0x%lX isWrite %d dev 0x%X sector %d nSectors %d "
         "offset %d\n", kibdP, isWrite, dev, startSector, nSectors, bufOffset);
  DBGASSERT(kibdP != NULL);
  DBGASSERT(bufOffset >= 0);
  DBGASSERT(nSectors > 0);

  /* Compute the total number of pages spanned by the portion of the
     buffer that will participate in the I/O.  This equals the number
     of buffer_heads that will be used. */
  bufEndOffset = bufOffset + nSectors*512 - 1;
  nTotalPages = (bufEndOffset/PAGE_SIZE) - (bufOffset/PAGE_SIZE) + 1;

  /* Allocate the entire list of buffer_head chunks needed for this I/O */
  bhcP = (struct cxiBufHeadChunk_t*) kmem_cache_alloc(BhcCacheP, SLAB_KERNEL);
  bhcHeadP = bhcP;
  if (bhcP == NULL)
    goto enomem;

  bhcP->bhcNextP = bhcP;
  bhcP->bhcPrevP = bhcP;
  bhcP->nBHUsed = 0;
  atomic_set(&bhcP->nBHActive, 0);
  nBHsAllocated = BUFFER_HEADS_PER_CHUNK;

  while (nBHsAllocated < nTotalPages)
  {
    bhcP = (struct cxiBufHeadChunk_t*) kmem_cache_alloc(BhcCacheP, SLAB_KERNEL);
    if (bhcP == NULL)
      goto enomem;

    bhcTailP = bhcHeadP->bhcPrevP;
    bhcP->bhcNextP = bhcHeadP;
    bhcP->bhcPrevP = bhcTailP;
    bhcTailP->bhcNextP = bhcP;
    bhcHeadP->bhcPrevP = bhcP;
    bhcP->nBHUsed = 0;
    atomic_set(&bhcP->nBHActive, 0);
    nBHsAllocated += BUFFER_HEADS_PER_CHUNK;
  }

  /* Convert to kernel version of dev_t */
  kdev = cxiDevToKernelDev(dev);

  /* Build and submit a buffer_head for each page of the current I/O */
  bhcP = bhcHeadP;
  pageIndex = bufOffset / PAGE_SIZE;
  pageOffset = bufOffset % PAGE_SIZE;

  DBGASSERT(pageOffset%512 == 0);
  sectorsThisBH = MIN((PAGE_SIZE-pageOffset) / 512, nSectors);
  while (nSectors > 0)
  {
    /* Get a buffer_head for the the next page */
    if (bhcP->nBHUsed == BUFFER_HEADS_PER_CHUNK)
    {
      bhcP = bhcP->bhcNextP;
      DBGASSERT(bhcP->nBHUsed == 0);
    }
    bhP = &bhcP->bh[bhcP->nBHUsed];
    bhcP->nBHUsed += 1;

    /* Initialize the new buffer_head */
    memset(bhP, 0, sizeof(*bhP));
    init_waitqueue_head(&bhP->b_wait);

    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
    DBGASSERT(pageP != NULL);

    /* Build and submit the buffer_head for the current page */
    bhP->b_size = sectorsThisBH * 512;
    bhP->b_page = pageP;
#ifndef __64BIT__
    if (PageHighMem(pageP))
      bhP->b_data = (char *)(0 + pageOffset);
    else
#endif
      bhP->b_data = page_address(pageP) + pageOffset;

    bhP->b_this_page = bhP;
    bhP->b_list = BUF_CLEAN;
    bhP->b_end_io = BHioDone;
    bhP->b_private = (void*)bhcP;
    bhP->b_dev = kdev;
    bhP->b_rdev = kdev;
    bhP->b_blocknr = startSector;
    bhP->b_rsector = startSector;
    bhP->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req) |
                   (1 << BH_Uptodate);
    TRACE3(TRACE_IO, 6, TRCID_KDOIO_LINUX_BH,
           "cxiStartIO: bhP 0x%lX sector %d sectorsThisBH %d\n",
           bhP, startSector, sectorsThisBH);
    atomic_inc(&bhcP->nBHActive);
    generic_make_request(isWrite, bhP);
    if (isWrite)
      kstat.pgpgout += sectorsThisBH;
    else
      kstat.pgpgin += sectorsThisBH;

    /* Advance to next page */
    startSector += sectorsThisBH;
    nSectors -= sectorsThisBH;
    sectorsThisBH = MIN(nSectors, PAGE_SIZE/512);
    pageIndex += 1;
    pageOffset = 0;
  }

  /* Set success return code and return list of active buffer_heads */
  *bhcHeadPP = bhcHeadP;
  return 0;

enomem:

  /* Free buffer_head chunks allocated so far and return failure */
  if (bhcHeadP != NULL)
  {
    bhcP = bhcHeadP;
    bhcTailP = bhcHeadP->bhcPrevP;
    do
    {
      p = bhcP;
      bhcP = bhcP->bhcNextP;
      kmem_cache_free(BhcCacheP, (void*)p);
    }
    while (p != bhcTailP);
  }
  return -ENOMEM;
}


/* Wait for a group of I/Os to complete.  Free the buffer_heads after all
   I/O is finished.  Returns -EIO if any buffer_head had an error. */
static int cxiWaitIO(struct cxiBufHeadChunk_t* bhcHeadP)
{
  int rc;
  struct cxiBufHeadChunk_t* bhcP;
  int i;
  struct buffer_head* bhP;
  struct cxiBufHeadChunk_t* p;

  /* Wait for I/O to be complete on all buffer_heads.  Wait on buffer_heads
     in the reverse of the order in which I/O was started.  By waiting on
     the last buffer_head first, it is likely that the calling thread will
     only have to sleep once. */
  rc = 0;
  DBGASSERT(bhcHeadP != NULL);
  bhcP = bhcHeadP->bhcPrevP;
  do
  {
    for (i=bhcP->nBHUsed-1 ; i>=0 ; i--)
    {
      bhP = &bhcP->bh[i];
      TRACE4(TRACE_IO, 12, TRCID_KWAITIO_BH,
             "cxiWaitIO: bhP 0x%lX sector %d size %d state 0x%lX\n",
             bhP, bhP->b_rsector, bhP->b_size, bhP->b_state);
      wait_on_buffer(bhP);
      if (!buffer_uptodate(bhP))
      {
        TRACE4(TRACE_IO, 1, TRCID_KWAITIO_BH_ERR,
             "cxiWaitIO: error bhP 0x%lX sector %d size %d state 0x%lX\n",
             bhP, bhP->b_rsector, bhP->b_size, bhP->b_state);
        rc = -EIO;
      }
    }
    p = bhcP;
    bhcP = bhcP->bhcPrevP;
    /* All of the I/Os in all of the buffer_heads inside of the
       cxiBufHeadChunk_t pointed to by p are complete (the BH_Lock bits
       have all been turned off).  However, it is possible that some I/O
       completion handlers may not yet have returned from BHioDone and
       therefore may not have finished accessing fields within the chunk
       of buffer_heads.  The nBHActive keeps track of how many
       completion routines have not yet returned.  If this is non-zero,
       the cxiBufHeadChunk_t cannot be freed yet.  Delay briefly to
       allow the interrupt handler on another processor to complete,
       then free the cxiBufHeadChunk_t.  Repeat the delay until the
       cxiBufHeadChunk_t is no longer in use by any interrupt handlers. */
    while (atomic_read(&p->nBHActive) > 0)
    {
      TRACE2(TRACE_IO, 1, TRCID_KWAITIO_BH_BUSY,
             "cxiWaitIO: p 0x%lX waiting for %d I/O completion handlers\n",
             p, atomic_read(&p->nBHActive));
      cxiSleep(10);
      atomic_inc(&cxiWaitIONDelays);
    }
    kmem_cache_free(BhcCacheP, (void*)p);
  } while (p != bhcHeadP);
  return rc;
}


/* Read or write the given sectors from dev.  Data should be placed into the
   I/O buffer beginning at byte offset bufOffset.  Returns EOK on success,
   negative values on error.  All of the data to be transferred will be in
   the first cxiKernelIOBufferDesc_t. */
int cxiKDoIO(struct cxiKernelIOBufferDesc_t* kibdP,
             Boolean isWrite, cxiDev_t dev, Int32 startSector, int nSectors,
             int sectorSize, int bufOffset)
{
  int rc;
  struct cxiBufHeadChunk_t* bhcHeadP;

  DBGASSERT(sectorSize == 512);

#ifdef KCSTRACE
  current->kcst_info.data[0] = dev;
  current->kcst_info.data[1] = startSector;
  current->kcst_info.data[2] = nSectors;
#endif

  rc = cxiStartIO(kibdP, isWrite, dev, startSector, nSectors, bufOffset,
                   &bhcHeadP);
  if (rc == 0)
    rc = cxiWaitIO(bhcHeadP);

#ifdef KCSTRACE
  current->kcst_info.data[0] = 0;
  current->kcst_info.data[1] = 0;
  current->kcst_info.data[2] = 0;
#endif

  return rc;
}



/* Routine to set up the disk block size and get disk parameters */
int GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP)
{
  kdev_t kdev;
  int n1KBlocks;

  /* Convert to kernel version of dev_t */
  kdev = cxiDevToKernelDev(devId);

  /* Get hardware sector size.  If unknown, assume 512. */
#if LINUX_KERNEL_VERSION >= 2040312
  diskInfoP->sectorSize = get_hardsect_size(kdev);
#else
  diskInfoP->sectorSize = get_hardblocksize(kdev);
#endif
  if (diskInfoP->sectorSize == 0)
    diskInfoP->sectorSize = 512;

  /* Set blocksize of this device to hardware sector size */
  set_blocksize(kdev, diskInfoP->sectorSize);

  /* If defined, return number of sectors on device */
  n1KBlocks = 0;
  if (blk_size[MAJOR(kdev)])
    n1KBlocks = blk_size[MAJOR(kdev)][MINOR(kdev)];
  diskInfoP->totalSectors = (Int64)n1KBlocks * 1024 / diskInfoP->sectorSize;
  TRACE3(TRACE_IO, 2, TRCID_DISKINFO,
         "GetDiskInfo: dev 0x%X sector size %d totalSectors %lld\n",
         devId, diskInfoP->sectorSize, diskInfoP->totalSectors);
#if 0
  printk("VMALLOC_START=0x%lX VMALLOC_END=0x%lX\n",
         VMALLOC_START, VMALLOC_END);
#endif

  return 0;
}
